# set working directory to location of the R file
#setwd("replication/scripts")

require(devtools)

# need to do this due to an issue with the QTE package
# see: https://github.com/bcallaway11/qte/issues/2
remove.packages("BMisc") # might have to restart R after running this line
library(remotes)
install_version("BMisc", version="1.4.2")
packageVersion("BMisc")



require(pacman)
pacman::p_load(tidyverse, lfe, dplyr, ggplot2, ggpubr, stargazer, PanelMatch, purrr, cowplot)
#install.packages("qte")
library(qte)

# combined panel
load("../Data/combined_panel.RData")

combined_panel <- as.data.frame(combined_panel)
combined_panel$period <- as.integer(combined_panel$period)
combined_panel$log_rev <- log(combined_panel$revenue)

# check staff and bureau covariates are the same
cor(combined_panel$total_bur_filings, combined_panel$total_staff_filings)
combined_panel$log_contracts <- log(combined_panel$total_bur_filings)
combined_panel$log_lobby <- log(combined_panel$total_bur_lobbyists)

# create treatment indicator
combined_panel$treat <- ifelse(combined_panel$prev_exp == 1 | combined_panel$staff_exp == 1, 1, 0)
combined_panel$treat <- ifelse(is.na(combined_panel$treat)==T,0,combined_panel$treat)

# correlation between treatment and covariates
mod <- lm(treat ~ log_contracts, data = combined_panel)
summary(mod)

coef(mod)[2] * min(combined_panel$log_contracts)
coef(mod)[2] * max(combined_panel$log_contracts)

(coef(mod)[2] * quantile(combined_panel$log_contracts, prob = 0.75)) / (coef(mod)[2] * quantile(combined_panel$log_contracts, prob = 0.25))


coef(mod)[2] * (quantile(combined_panel$log_contracts, prob = 0.75)-
                  quantile(combined_panel$log_contracts, prob = 0.25))

# create matched sets
comb <- PanelMatch(lag = 6, 
                   time.id = "period", unit.id = "reg_lobby_core_id", 
                   treatment = "treat", 
                   #refinement.method = "none", #alternative none
                   refinement.method = "mahalanobis", #alternative none
                   data = combined_panel, 
                   match.missing = T, 
                   covs.formula = ~ I(lag(log_contracts,1:4)) + I(lag(log_lobby,1:4)) + I(lag(log_rev,1:4)), 
                   qoi = "att" ,
                   outcome.var = "log_rev",
                   lead = 0:4, 
                   forbid.treatment.reversal = T)

overview <- print(comb$att)
overview[,2] <- as.numeric(as.character(overview[,2]))
overview$id <- 1:nrow(overview)

test <- comb$att

test2 <- as.numeric(as.character(test[[1]]))

test3 <- filter(combined_panel, period %in% (overview[1,2]-6):(overview[1,2]+4) &
                  reg_lobby_core_id %in% test2 | reg_lobby_core_id == overview[1,1])

weights <- data.frame(reg_lobby_core_id = as.numeric(as.character(comb$att[[1]])),
                      weights = attributes(comb$att[[1]])[[1]])


test3$treated <- NA
test3$treated <- ifelse(test3$reg_lobby_core_id == overview[1,1], 1, 0)
test3$et <- test3$period - overview[1,2] 
test3$post <- ifelse(test3$et == 0, 1, 0)

test3 <- left_join(test3, weights, by = "reg_lobby_core_id")

test3 <- test3 %>%
  group_by(reg_lobby_core_id) %>%
  arrange(reg_lobby_core_id, period) %>%
  mutate(lag_rev = dplyr::lag(log_rev, 1, order_by = period),
         change_rev = log_rev - lag_rev)

test3$event <- 1
test3$id <- paste(test3$reg_lobby_core_id, test3$event, sep = "-")


test3 <- filter(test3, et > -3 & et < 1)


prep_qdid <- function(event_id){
  
  #crnt_id <- filter(overview, reg_lobby_core_id == id)
  crnt_id <- filter(overview, id == event_id)
  
  test <- comb$att
  
  test2 <- as.numeric(as.character(test[[event_id]]))
  
  test3 <- filter(combined_panel, period %in% (crnt_id[1,2]-6):(crnt_id[1,2]+4) &
                    reg_lobby_core_id %in% test2 | reg_lobby_core_id == crnt_id[1,1])
  
  weights <- data.frame(reg_lobby_core_id = as.numeric(as.character(comb$att[[event_id]])),
                        weights = attributes(comb$att[[event_id]])[[1]])
  
  test3$treated <- NA
  test3$treated <- ifelse(test3$reg_lobby_core_id == crnt_id[1,1], 1, 0)
  test3$et <- test3$period - crnt_id[1,2] 
  test3$post <- ifelse(test3$et == 0, 1, 0)
  
  test3 <- left_join(test3, weights, by = "reg_lobby_core_id")
  
   # test3 <- test3 %>%
   #   group_by(reg_lobby_core_id) %>%
   #   arrange(reg_lobby_core_id, period) %>%
   #   mutate(treated_firm = ifelse(treated_firm > 0 & et == 0,
   #                                , 1, 0))
  
  test3$event <- event_id
  test3$id <- paste(test3$reg_lobby_core_id, test3$event, sep = "-")
  
  test3 <- filter(test3, et > -3 & et < 1)
  
  
  # did <- data.frame(treat = test3 %>%
  #                     ungroup() %>%
  #                     filter(treated == 1 & et == 0) %>%
  #                     select(change_rev) ,   
  #                   control = test3 %>%
  #                     filter(treated == 0 & et == 0) %>%
  #                     ungroup() %>%
  #                     #summarise(av_change = mean(change_rev, na.rm=T)) %>%
  #                     summarise(av_change = weighted.mean(x = change_rev,
  #                                                         w = weights,
  #                                                         na.rm=T)) %>%
  #                     select(av_change) )
  
  return(test3)
  
}    



all_did <- map(overview[,4],
                ~ prep_qdid(.))


all_did <- do.call("rbind", all_did)

all_did <- filter(all_did, is.na(log_rev)==F)

all_did <- all_did [!duplicated(all_did[c(17,1)]),]



qd1 <- QDiD(log_rev ~ treated, t = 0, tmin1 = -1, tname = "et",
            #xformla=~ log_contracts + log_lobby,
            data = all_did, 
            idname = "reg_lobby_core_id", 
            panel = T, #se=FALSE,
            probs=seq(0.05, 0.95, 0.1), alp = 0.1)

qdd_est <- data.frame(qte = qd1$qte,
                      se = qd1$qte.se,
                      lwr = qd1$qte.lower,
                      upr = qd1$qte.upper,
                      perc = seq(0.05, 0.95, 0.1),
                      lvl =  quantile(all_did$log_rev, probs = seq(0.05, 0.95, 0.1)))


p_qdd_full <- ggplot(qdd_est, aes(x = perc, y = qte)) +
  geom_point() +
  geom_line() +
  geom_ribbon(aes(ymin = lwr, ymax = upr), alpha = 0.1) +
  theme_classic() +
  geom_hline(yintercept = 0, lty = 3) +
  # geom_text(aes(x = perc + 0.015, y = qte + 0.5, 
  #               label = lvl ) ) +
  labs(x = "Percentile on ln Revenue",
       y = "Effect of Connection on ln Revenue\n(Quantile Difference-in-Differences)",
       title = "A: Full Distribution")

p_qdd_full

p_qdd <- ggplot(qdd_est[-c(1),], aes(x = perc, y = qte)) +
  geom_point() +
  geom_line() +
  geom_ribbon(aes(ymin = lwr, ymax = upr), alpha = 0.1) +
  theme_classic() +
  geom_hline(yintercept = 0, lty = 3) +
  # geom_text(aes(x = perc + 0.015, y = qte + 0.5, 
  #               label = lvl ) ) +
  labs(x = "Percentile on ln Revenue",
       y = "Effect of Connection on ln Revenue\n(Quantile Difference-in-Differences)",
       title = "B: Excluding Low Revenue Firms")


p_qdd



p <- plot_grid(p_qdd_full, p_qdd)
p

ggsave(filename = "../images/FigureD10.pdf",
       width = 10, height = 5)

